## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Read data

source(paste0(root.dir, '../step2/scripts/myRLib.R'))
intermediate.files <- strsplit('../step2/output-direction_XT_YI/fastGlu__wtccc_t2d__wtccc_ctrl/covar_LDpred_p1.0000e+00.txt:../step2/output-direction_XT_YI/fastGlu__wtccc_t2d__wtccc_ctrl/covar_LDpred_p3.0000e-01.txt:../step2/output-direction_XT_YI/fastGlu__wtccc_t2d__wtccc_ctrl/covar_LDpred_p1.0000e-01.txt', ':')[[1]]
ps <- strsplit('1,0.3,0.1', ',')[[1]]
expression.files <- strsplit('output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Artery_Coronary_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/DGN-HapMap-2015_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Whole_Blood_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Cells_EBV-transformed_lymphocytes_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Thyroid_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Stomach_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Small_Intestine_Terminal_Ileum_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Esophagus_Muscularis_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Spleen_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Pancreas_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Esophagus_Gastroesophageal_Junction_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Adrenal_Gland_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Adipose_Visceral_Omentum_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Adipose_Subcutaneous_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Pituitary_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Colon_Transverse_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Esophagus_Mucosa_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Liver_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Lung_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Muscle_Skeletal_predicted_expression.txt:output-run_step3/fastGlu__wtccc_t2d__wtccc_ctrl/Cells_Transformed_fibroblasts_predicted_expression.txt', ':')[[1]]
ts <- strsplit('Artery_Coronary,DGN-HapMap-2015,Whole_Blood,Cells_EBV-transformed_lymphocytes,Thyroid,Stomach,Small_Intestine_Terminal_Ileum,Esophagus_Muscularis,Spleen,Pancreas,Esophagus_Gastroesophageal_Junction,Adrenal_Gland,Adipose_Visceral_Omentum,Adipose_Subcutaneous,Pituitary,Colon_Transverse,Esophagus_Mucosa,Liver,Lung,Muscle_Skeletal,Cells_Transformed_fibroblasts', ',')[[1]]
gene <- read.table(getFile(root.dir, 'output-prepare_gene_id/fastGlu__wtccc_t2d__wtccc_ctrl.genelist'), header = F, sep = '\t')
colnames(gene) <- c('gene.name', 'gencode.id')
yi <- read.table(getFile(root.dir, '../step2/output-direction_XT_YI/fastGlu__wtccc_t2d__wtccc_ctrl/yi.logistic.assoc'), header = T)
cols <- colnames(yi)
yi.filenames <- apply(yi, 1, function(x) { return(basename(x[1]))})
yi.container <- c()
expression.list <- list()
for(i in 1 : length(ts)) {
    expression.list[[ts[i]]] <- read.table(getFile(root.dir, expression.files[i]), header = T, sep = '  ')
}
intermediate.list <- list()
for(i in 1 : length(ps)) {
    thisfile <- basename(intermediate.files[i])
    yi.container <- rbind(yi.container, c(ps[i], yi[yi.filenames == thisfile, 2:7]))
    intermediate.list[[ps[i]]] <- read.table(getFile(root.dir, intermediate.files[i]), header = T, sep = ' ')
    intermediate.list[[ps[i]]]$PHENO <- intermediate.list[[ps[i]]]$PHENO - 1
}
yi.container <- data.frame(yi.container)
colnames(yi.container) <- c('causal.fraction', cols[2:7])
pander(gene, knitr.auto.asis = T)
## 
## --------------------------------
##  gene.name       gencode.id     
## ----------- --------------------
##     IDE      ENSG00000119912.11 
## 
##    CIDEB     ENSG00000136305.7  
## 
##    CIDEA     ENSG00000176194.13 
## 
##   CIDECP     ENSG00000186162.6  
## 
##    CIDEC     ENSG00000187288.6  
## 
##    HHEX      ENSG00000152804.6  
## 
##    TTLL5     ENSG00000119685.15 
## 
##   CCDC33     ENSG00000140481.9  
## --------------------------------

Marginal effect of gene expression on phenotype \(Y \sim E\)

ye.container <- c()
inter <- intermediate.list[[ps[i]]]
temp.container <- c()
for (t in names(expression.list)) {
    expre <- expression.list[[t]]
    df <- inner_join(inter, expre, by = "IID")
    for (g in gene[, 2]) {
        if (sum(abs(expre[, g])) != 0) {
            this.expre <- scale(expre[, g])
        } else {
            this.expre <- expre[, g]
        }
        data <- data.frame(y = inter$PHENO, e = this.expre)
        model <- glm(y ~ e, family = binomial(link = "logit"), 
            data = data)
        stats <- summary(model)
        temp.container <- rbind(temp.container, c(t, g, stats$coefficients[1], 
            stats$coefficients[3], stats$coefficients[7], stats$coefficients[2], 
            stats$coefficients[4], stats$coefficients[8]))
    }
}
temp.container <- data.frame(temp.container)
colnames(temp.container) <- c("tissue", "gene", "intercept.estmate", 
    "intercept.std", "intercept.pval", "E.estmate", "E.std", 
    "E.pval")
temp.container <- left_join(temp.container, gene, by = c(gene = "gencode.id"))
temp.container <- cleanDF(temp.container, c("intercept.estmate", 
    "intercept.std", "intercept.pval", "E.estmate", "E.std", 
    "E.pval"))
ncol <- length(unique(temp.container[!is.na(temp.container$E.pval), 
    "gene.name"]))/3
if (ncol == floor(ncol)) {
    ncol <- floor(ncol)
} else {
    ncol <- floor(ncol) + 1
}
p <- ggplot(temp.container[!is.na(temp.container$E.pval), ], 
    aes(x = tissue)) + geom_point(aes(y = E.estmate)) + geom_errorbar(aes(ymin = E.estmate - 
    1.96 * E.std, ymax = E.estmate + 1.96 * E.std), colour = "black", 
    width = 0.1) + geom_hline(yintercept = 0, color = "red") + 
    coord_flip() + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1)) + geom_text(aes(y = E.estmate, label = paste0("pval = ", 
    formatC(E.pval, format = "e", digits = 2))), vjust = -0.5, 
    hjust = 0.5, size = 4) + ggtitle("Y ~ E: correlation estimate") + 
    ylab("Regression coefficient") + xlab("Expression Tissue") + 
    facet_wrap(~gene.name, scales = "free_x", ncol = 3)
subchunkify(p, fig_asp = ncol)

ye.container <- rbind(ye.container, temp.container)

Marginal effect of gene expression on intermediate trait \(I \sim E\)

ie.container <- c()
for (i in 1:length(intermediate.list)) {
    cat("##", ps[i])
    cat("\n")
    cat("\n")
    inter <- intermediate.list[[ps[i]]]
    temp.container <- c()
    for (t in names(expression.list)) {
        expre <- expression.list[[t]]
        df <- inner_join(inter, expre, by = "IID")
        for (g in gene[, 2]) {
            if (sum(abs(expre[, g])) != 0) {
                this.expre <- scale(expre[, g])
            } else {
                this.expre <- expre[, g]
            }
            data <- data.frame(i = inter$PRS, e = this.expre)
            model <- lm(i ~ e, data = data)
            stats <- summary(model)
            temp.container <- rbind(temp.container, c(ps[i], 
                t, g, stats$coefficients[1], stats$coefficients[3], 
                stats$coefficients[7], stats$coefficients[2], 
                stats$coefficients[4], stats$coefficients[8]))
        }
    }
    temp.container <- data.frame(temp.container)
    colnames(temp.container) <- c("causal.fraction", "tissue", 
        "gene", "intercept.estmate", "intercept.std", "intercept.pval", 
        "E.estmate", "E.std", "E.pval")
    temp.container <- left_join(temp.container, gene, by = c(gene = "gencode.id"))
    temp.container <- cleanDF(temp.container, c("intercept.estmate", 
        "intercept.std", "intercept.pval", "E.estmate", "E.std", 
        "E.pval"))
    ncol <- length(unique(temp.container[!is.na(temp.container$E.pval), 
        "gene.name"]))/3
    if (ncol == floor(ncol)) {
        ncol <- floor(ncol)
    } else {
        ncol <- floor(ncol) + 1
    }
    p <- ggplot(temp.container[!is.na(temp.container$E.pval), 
        ], aes(x = tissue)) + geom_point(aes(y = E.estmate)) + 
        geom_errorbar(aes(ymin = E.estmate - 1.96 * E.std, ymax = E.estmate + 
            1.96 * E.std), colour = "black", width = 0.1) + geom_hline(yintercept = 0, 
        color = "red") + coord_flip() + theme(axis.text.x = element_text(angle = 90, 
        hjust = 1)) + geom_text(aes(y = E.estmate, label = paste0("pval = ", 
        formatC(E.pval, format = "e", digits = 2))), vjust = -0.5, 
        hjust = 0.5, size = 4) + ggtitle("I ~ E: correlation estimate") + 
        ylab("Regression coefficient") + xlab("Expression Tissue") + 
        facet_wrap(~gene.name, scales = "free_x", ncol = 3)
    subchunkify(p, fig_asp = ncol)
    ie.container <- rbind(ie.container, temp.container)
    cat("\n\n")
}

1

0.3

0.1

Marginal effect of gene expression and intermediate trait on phenotype \(Y \sim E + I\)

yei.container <- c()
for (i in 1:length(intermediate.list)) {
    cat("##", ps[i])
    cat("\n")
    cat("\n")
    inter <- intermediate.list[[ps[i]]]
    temp.container <- c()
    for (t in names(expression.list)) {
        expre <- expression.list[[t]]
        df <- inner_join(inter, expre, by = "IID")
        for (g in gene[, 2]) {
            if (sum(abs(expre[, g])) != 0) {
                this.expre <- scale(expre[, g])
            } else {
                this.expre <- expre[, g]
            }
            data <- data.frame(y = inter$PHENO, i = inter$PRS, 
                e = this.expre)
            model <- glm(y ~ e + i, family = binomial(link = "logit"), 
                data = data)
            stats <- summary(model)
            temp.container <- rbind(temp.container, c(ps[i], 
                t, g, stats$coefficients[2], stats$coefficients[5], 
                stats$coefficients[11], stats$coefficients[3], 
                stats$coefficients[6], stats$coefficients[12]))
        }
    }
    temp.container <- data.frame(temp.container)
    colnames(temp.container) <- c("causal.fraction", "tissue", 
        "gene", "E.estmate", "E.std", "E.pval", "I.estmate", 
        "I.std", "I.pval")
    temp.container <- left_join(temp.container, gene, by = c(gene = "gencode.id"))
    temp.container <- cleanDF(temp.container, c("I.estmate", 
        "I.std", "I.pval", "E.estmate", "E.std", "E.pval"))
    ncol <- length(unique(temp.container[!is.na(temp.container$I.pval), 
        "gene.name"]))/3
    if (ncol == floor(ncol)) {
        ncol <- floor(ncol)
    } else {
        ncol <- floor(ncol) + 1
    }
    p1 <- ggplot(temp.container[!is.na(temp.container$I.pval), 
        ], aes(x = tissue)) + geom_point(aes(y = I.estmate)) + 
        geom_errorbar(aes(ymin = I.estmate - 1.96 * I.std, ymax = I.estmate + 
            1.96 * I.std), colour = "black", width = 0.1) + geom_hline(yintercept = 0, 
        color = "red") + geom_hline(yintercept = yi.container[yi.container$causal.fraction == 
        ps[i], "prs.estmate"][[1]], color = "blue") + coord_flip() + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
        geom_text(aes(y = I.estmate, label = paste0("pval = ", 
            formatC(E.pval, format = "e", digits = 2))), vjust = -0.5, 
            hjust = 0.3, size = 4) + ggtitle("Y ~ E + I (I): correlation estimate") + 
        ylab("Regression coefficient") + xlab("Expression Tissue") + 
        facet_wrap(~gene.name, scales = "free_x", ncol = 3)
    subchunkify(p1, fig_asp = ncol)
    
    temp <- ye.container[, c("tissue", "gene.name", "E.estmate", 
        "E.std", "E.pval")]
    temp$marginal <- T
    temp2 <- temp.container[, c("tissue", "gene.name", "E.estmate", 
        "E.std", "E.pval")]
    temp2$marginal <- F
    plot.e <- rbind(temp2, temp)
    ncol <- length(unique(plot.e[!is.na(plot.e$E.pval), "gene.name"]))/3
    if (ncol == floor(ncol)) {
        ncol <- floor(ncol)
    } else {
        ncol <- floor(ncol) + 1
    }
    dodge <- position_dodge(width = 0.5)
    p2 <- ggplot(plot.e[!is.na(plot.e$E.pval), ], aes(x = tissue, 
        color = marginal)) + geom_point(aes(y = E.estmate), position = dodge) + 
        geom_errorbar(aes(ymin = E.estmate - 1.96 * E.std, ymax = E.estmate + 
            1.96 * E.std), width = 0.1, position = dodge) + geom_hline(yintercept = 0, 
        color = "black") + coord_flip() + theme(axis.text.x = element_text(angle = 90, 
        hjust = 1)) + geom_text(aes(y = E.estmate, label = paste0("pval = ", 
        formatC(E.pval, format = "e", digits = 2)), vjust = -0.3, 
        hjust = 0.3), size = 3, position = dodge) + ggtitle("Y ~ E + I (E): correlation estimate") + 
        ylab("Regression coefficient") + xlab("Expression Tissue") + 
        facet_wrap(~gene.name, scales = "free_x", ncol = 3)
    subchunkify(p2, fig_asp = ncol)
    yei.container <- rbind(yei.container, temp.container)
    cat("\n\n")
}

1

0.3

0.1